Lab 2: time series patterns/graphics

time plot

##You can create time plot using autoplot() function

arrival_daily %>% feasts::autoplot() # create a time plot of daily data
## Plot variable not specified, automatically selected `.vars = arrival`
## `mutate_if()` ignored the following grouping variables:

arrival_hourly %>% autoplot() # create a time plot of hourly data
## Plot variable not specified, automatically selected `.vars = arrival`

total_arrival_hourly %>% feasts::autoplot() 
## Plot variable not specified, automatically selected `.vars = arrival`

total_arrival_daily %>% autoplot()
## Plot variable not specified, automatically selected `.vars = arrival`

You can use filter_index or head() and tail() to select a subset of time series , try ?filter_index or google it

total_arrival_daily %>% tsibble::filter_index("2016-02") %>% autoplot(arrival) # plot only February 2016 

total_arrival_daily %>% tsibble::filter_index("2016-02" ~ .) %>% autoplot(arrival) # plot from February 2016 till the end

total_arrival_daily %>% tsibble::filter_index(. ~ "2016-02") %>% autoplot(arrival) # plot from February 2016 till the end

total_arrival_hourly %>% tsibble::filter_index("2015-01-01") %>% autoplot(arrival) # plot only 2015-01-01
## Warning: Argument 'roll' is deprecated. Deprecated in version '1.8.4'.

## Warning: Argument 'roll' is deprecated. Deprecated in version '1.8.4'.

total_arrival_daily %>% head(n=100) %>% autoplot() # first 100 observations
## Plot variable not specified, automatically selected `.vars = arrival`

total_arrival_daily %>% tail(n=100) %>% autoplot()  # last 100 observations
## Plot variable not specified, automatically selected `.vars = arrival`

It could be really hard to visualise hourly or daily data, one possibility is to view a portion of the data, you can also use interactive plots:

tsbox::ts_xts(total_arrival_daily) %>% 
  dygraph() %>% 
  dyRangeSelector(dateWindow = c("2010-01-01", "2010-02-01"))
## [time]: 'date' [value]: 'arrival'
tsbox::ts_xts(total_arrival_hourly) %>% 
  dygraph() %>% 
  dyRangeSelector(dateWindow = c("2010-01-01", "2010-01-02"))
## [value]: 'arrival'

You can also plot monthly, weekly, quarterly time series using ggplot2 package, see below for an example

monthly_admissions <- total_arrival_daily %>% 
  index_by(month = yearmonth(date)) %>%
  summarise(arrival = sum(arrival))

  
monthly_admissions %>% 
  autoplot(arrival) +
  labs(y = "arrival", x="Month",
       title = "Monthly A&E arrival",
       subtitle = "UK hospital")

Seasonal plots

use seasonal and subseries plots to check whether series contain seasonality

total_arrival_daily %>% feasts::gg_season(arrival) 

total_arrival_daily %>% feasts::gg_subseries(arrival)

You can also try this with hourly series, you can change the “period= …” argument to see seasonality

total_arrival_hourly %>% feasts::gg_season(arrival, period = "day") 

total_arrival_daily %>% feasts::gg_season(arrival, period = "week")

total_arrival_hourly %>% gg_season(arrival,period = "week")# change period 

Is there any seasonality in the daily time series? what about hourly and monthly?

How do you create a seasonal plot for the weekly, monthly and quarterly series series

weekly_admissions <- total_arrival_daily %>% 
  index_by(week = yearweek(date)) %>%
  summarise(arrival = sum(arrival))
gg_season(weekly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

gg_subseries(weekly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

gg_season(monthly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

gg_subseries(monthly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

quarterly_admissions <- total_arrival_daily %>% 
  index_by(quarter = yearquarter(date)) %>%
  summarise(arrival = sum(arrival))
gg_season(quarterly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

gg_subseries(quarterly_admissions)
## Plot variable not specified, automatically selected `y = arrival`

autocorrelation plot

total_arrival_daily %>% feasts::gg_lag(arrival, lags = c(1:14), geom = "point")# create lag plots for 14 lags, from 1 to 14

total_arrival_daily %>% feasts::ACF(lag_max = 14)# compute autocorrelation function for 14 lags
## Response variable not specified, automatically selected `var = arrival`
## # A tsibble: 14 x 2 [1D]
##         lag    acf
##    <cf_lag>  <dbl>
##  1       1D 0.428 
##  2       2D 0.177 
##  3       3D 0.173 
##  4       4D 0.162 
##  5       5D 0.166 
##  6       6D 0.323 
##  7       7D 0.465 
##  8       8D 0.292 
##  9       9D 0.0905
## 10      10D 0.0919
## 11      11D 0.0820
## 12      12D 0.0734
## 13      13D 0.246 
## 14      14D 0.390

plot the autocorrelation

total_arrival_hourly %>% ACF(lag_max = 48) %>% autoplot()# plot acf
## Response variable not specified, automatically selected `var = arrival`

total_arrival_daily %>% ACF(lag_max = 14) %>% autoplot()# plot acf
## Response variable not specified, automatically selected `var = arrival`

You can have a time plot, acf and season plot in one single plot if you use gg_tsdisplay() function

# total_arrival_hourly %>% filter(gender == "female") %>%  gg_tsdisplay()#  check ?gg_tsdisplay
# total_arrival_daily %>% filter(gender == "female") %>% gg_tsdisplay()#  check ?gg_tsdisplay

total_arrival_hourly %>% filter(gender == "female") %>%  gg_tsdisplay()#  check ?gg_tsdisplay
## Plot variable not specified, automatically selected `y = arrival`

total_arrival_daily  %>% gg_tsdisplay()#  check ?gg_tsdisplay
## Plot variable not specified, automatically selected `y = arrival`

You can use ljung box to test whether ACF is significant, if p-value is small, << 0.05 then there is a significant autocorrelation:

total_arrival_daily %>% features(arrival, ljung_box)
## # A tibble: 1 × 2
##   lb_stat lb_pvalue
##     <dbl>     <dbl>
## 1    419.         0

What autocorrelation will tell us? Which key features could be highlighted by ACF?

Make any graph using ggplot2 (optional)

You can create any graph that helps you to better understand data! I recommend you to look at the distributions of your variables, try geom_boxplot() , geom_histogram() and geom_density() which are helpful to better understand the variations

Here I tried to see if arrival of males or females is different over the weekend comparing to the weekday

weekend_an_weekday <- total_arrival_hourly %>% group_by(gender) %>% 
  summarise(arrival=sum(arrival)) %>% 
  mutate(
    Date=lubridate::as_date(time),
    hour=lubridate::hour(time),
    Day = lubridate::wday(time, label = TRUE),
    Weekend = (Day %in% c("Sun", "Sat"))) %>% 
  filter(gender =="female") 
weekend_an_weekday %>% ggplot(aes(x = hour, y = arrival)) +
  geom_line(aes(group=Date)) +
  facet_grid(Weekend ~., scales="free_y")